library(tidyverse)
library(sf)
library(raster)
library(kableExtra)
library(tidycensus)
library(tigris)
library(FNN)
library(caret)
library(yardstick)
library(plotROC)
library(ggrepel)
library(pROC)
library(grid)
library(gridExtra)
library(viridis)
library(igraph)
library(mapview)
library(FedData)
library(terra)
library(classInt) # For classification
library(mapview) # For interactive maps
library(ggplot2)
library(knitr)
library(ROCR)
library(mapedit)
palette2 <- c("#41b6c4","#253494")
palette4 <- c("#a1dab4","#41b6c4","#2c7fb8","#253494")
palette5 <- c("#ffffcc","#a1dab4","#41b6c4","#2c7fb8","#253494")
palette10 <- c("#f7fcf0","#e0f3db","#ccebc5","#a8ddb5","#7bccc4",
"#4eb3d3","#2b8cbe","#0868ac","#084081","#f7fcf0")
# Custom helper functions used in urban growth modeling
quintileBreaks <- function(df,variable) {
as.character(quantile(df[[variable]],
c(.01,.2,.4,.6,.8),na.rm=T))
}
xyC <- function(aPolygonSF) {
as.data.frame(
cbind(x=st_coordinates(st_centroid(aPolygonSF))[,1],
y=st_coordinates(st_centroid(aPolygonSF))[,2]))
}
rast <- function(inRaster) {
data.frame(
xyFromCell(inRaster, 1:ncell(inRaster)),
value = getValues(inRaster)) }
nn_function <- function(measureFrom,measureTo,k) {
measureFrom_Matrix <- as.matrix(measureFrom)
measureTo_Matrix <- as.matrix(measureTo)
nn <- get.knnx(measureTo, measureFrom, k)$nn.dist
output <- as.data.frame(nn) %>%
rownames_to_column(var = "thisPoint") %>%
gather(points, point_distance, V1:ncol(.)) %>%
arrange(as.numeric(thisPoint)) %>%
group_by(thisPoint) %>%
summarize(pointDistance = mean(point_distance)) %>%
arrange(as.numeric(thisPoint)) %>%
dplyr::select(-thisPoint) %>%
pull()
return(output)
}
aggregateRaster <- function(inputRasterList, theFishnet) {
theseFishnets <- theFishnet %>% dplyr::select()
for (i in inputRasterList) {
varName <- names(i)
thesePoints <-
rasterToPoints(i) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(theFishnet)) %>%
filter(.[[1]] == 1)
thisFishnet <-
aggregate(thesePoints, theFishnet, length) %>%
mutate(!!varName := ifelse(is.na(.[[1]]),0,1))
theseFishnets <- cbind(theseFishnets,thisFishnet)
}
return(theseFishnets)
}
#import land cover data in 2011 and 2021
lc_2011 <- terra::rast("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/data/land cover/2011.tif")
lc_2021 <- terra::rast("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/data/land cover/2021.tif")
# import road data in 2011 and 2021
roads_2011 <- vect("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/tl_2011_48_pri_FeaturesToJSO.json")
roads_2021 <- vect("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/tl_2021_48_pri_FeaturesToJSO.json")
# MSA boundary
msa_boundary <- vect("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/bastrop_county_FeaturesToJSO.json")
msa_boundary <- project(msa_boundary, crs(lc_2011))
roads_2011 <- project(roads_2011, crs(lc_2011))
roads_2021 <- project(roads_2021, crs(lc_2011))
# Clip raster and vectors to Austin MSA boundary
lc_2011_austin <- mask(crop(lc_2011, msa_boundary), msa_boundary)
lc_2021_austin <- mask(crop(lc_2021, msa_boundary), msa_boundary)
roads_2011_austin <- crop(roads_2011, msa_boundary)
roads_2021_austin <- crop(roads_2021, msa_boundary)
austinMSA <- msa_boundary %>% sf::st_as_sf()
austinMSA <- st_transform(austinMSA, 2278) # NAD83 / Texas Central (ftUS)
# Convert terra SpatRaster to raster format (if needed for compatibility)
lc_2011_r <- raster::raster(lc_2011_austin)
lc_2021_r <- raster::raster(lc_2021_austin)
# Resample to 30x coarser resolution using modal value (for categorical data)
lc_2011_rs <- aggregate(lc_2011_r, fact = 30, fun = "modal")
lc_2021_rs <- aggregate(lc_2021_r, fact = 30, fun = "modal")
# Reclassify to Developed (1) vs. Undeveloped (0)
# Define reclassification: Developed classes = 13 to 24
reclassMatrix <- matrix(c(
0, 12, 0,
12, 24, 1,
24, Inf, 0),
ncol = 3, byrow = TRUE
)
# Reclassify each land cover raster
developed_2011 <- reclassify(lc_2011_rs, reclassMatrix)
developed_2021 <- reclassify(lc_2021_rs, reclassMatrix)
# Detect Development Change
# Add rasters together: 0 = undeveloped both, 1 = changed, 2 = developed both
development_change <- developed_2011 + developed_2021
# Histogram to inspect values
hist(development_change, main = "Land Cover Change (2011–2021)",
xlab = "0: No Dev | 1: Change | 2: Dev Both", col = "lightblue")
# Isolate Only Changed Cells (value = 1)
# Set values that are NOT 1 (i.e., no change or dev both) to NA
development_change[development_change != 1] <- NA
# Visualize Land Cover Change
mapView(development_change, layer.name = "Development Change (2011–2021)")
# Create fishnet grid over the Austin MSA at same resolution & CRS as development raster
# Convert msa_boundary to sf format
msa_boundary_sf <- sf::st_as_sf(msa_boundary)
# Then create the fishnet
austin_fishnet <-
st_make_grid(msa_boundary_sf %>%
st_transform(crs(development_change)),
cellsize = res(development_change)[1],
square = TRUE) %>%
st_sf() %>%
st_intersection(
msa_boundary_sf %>%
dplyr::select(geometry) %>%
st_transform(crs(development_change))
) %>%
mutate(uniqueID = rownames(.))
# Reclassify NLCD codes into new land cover categories
developed_2011 <- lc_2011_rs %in% c(21, 22, 23, 24)
forest_2011 <- lc_2011_rs %in% c(41, 42, 43)
farm_2011 <- lc_2011_rs %in% c(81, 82)
wetlands_2011 <- lc_2011_rs %in% c(90, 95)
otherUndeveloped_2011 <- lc_2011_rs %in% c(52, 71, 31)
water_2011 <- lc_2011_rs == 11
developed_2021 <- lc_2021_rs %in% c(21, 22, 23, 24)
forest_2021 <- lc_2021_rs %in% c(41, 42, 43)
farm_2021 <- lc_2021_rs %in% c(81, 82)
wetlands_2021 <- lc_2021_rs %in% c(90, 95)
otherUndeveloped_2021 <- lc_2021_rs %in% c(52, 71, 31)
water_2021 <- lc_2021_rs == 11
# Assign names for aggregation
names(developed_2011) <- "developed_2011"
names(forest_2011) <- "forest_2011"
names(farm_2011) <- "farm_2011"
names(wetlands_2011) <- "wetlands_2011"
names(otherUndeveloped_2011) <- "otherUndeveloped_2011"
names(water_2011) <- "water_2011"
names(developed_2021) <- "developed_2021"
names(forest_2021) <- "forest_2021"
names(farm_2021) <- "farm_2021"
names(wetlands_2021) <- "wetlands_2021"
names(otherUndeveloped_2021) <- "otherUndeveloped_2021"
names(water_2021) <- "water_2021"
# Create raster lists
rasterList_2011 <- c(developed_2011, forest_2011, farm_2011,
wetlands_2011, otherUndeveloped_2011, water_2011)
rasterList_2021 <- c(developed_2021, forest_2021, farm_2021,
wetlands_2021, otherUndeveloped_2021, water_2021)
# Aggregate 2011 land cover categories to fishnet
lcRasters_2011 <- aggregateRaster(rasterList_2011, austin_fishnet) %>%
dplyr::select(developed_2011,
forest_2011,
farm_2011,
wetlands_2011,
otherUndeveloped_2011,
water_2011) %>%
mutate_if(is.numeric, as.factor)
# Aggregate 2021 land cover categories to fishnet (if needed for visual/validation)
lcRasters_2021 <- aggregateRaster(rasterList_2021, austin_fishnet) %>%
dplyr::select(developed_2021,
forest_2021,
farm_2021,
wetlands_2021,
otherUndeveloped_2021,
water_2021) %>%
mutate_if(is.numeric, as.factor)
# Convert raster to points, then join to fishnet
changePoints <- rasterToPoints(development_change) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(austin_fishnet))
# Summarize change points to each fishnet cell
fishnet <- aggregate(changePoints, austin_fishnet, FUN = sum) %>%
mutate(development_change = ifelse(is.na(layer), 0, 1),
development_change = as.factor(development_change)) %>%
dplyr::select(-layer)
# First, create a new base fishnet with uniqueID (if you haven't)
fishnet <- austin_fishnet %>%
mutate(uniqueID = as.character(rownames(.)))
# Then, make sure lcRasters_2011 has the same uniqueID column in the same order
lcRasters_2011 <- lcRasters_2011 %>%
mutate(uniqueID = as.character(rownames(.)))
# Now you can safely join
fishnet <- left_join(fishnet, st_drop_geometry(lcRasters_2011), by = "uniqueID")
# Plot land cover types in 2011 to verify aggregation
lcRasters_2011 %>%
st_centroid() %>%
gather(key = "variable", value = "value", developed_2011:water_2011) %>%
mutate(X = xyC(.)$x, Y = xyC(.)$y) %>%
ggplot() +
geom_sf(data = austinMSA) +
geom_point(aes(X, Y, colour = as.factor(value))) +
facet_wrap(~variable) +
scale_colour_manual(values = palette2,
labels = c("Other", "Land Cover"),
name = "") +
labs(title = "Land Cover Types, 2011 (Austin Fishnet)",
subtitle = "As fishnet centroids") +
theme_void()
## Warning: st_centroid assumes attributes are constant over geometries
### This 6-panel map displays the spatial distribution of major land
cover types within the Austin metropolitan statistical area as of 2011,
mapped to a fishnet grid structure. Categories include developed land,
farmland, forest, other undeveloped areas, wetlands, and water. These
maps provide a baseline understanding of landscape composition and land
availability prior to urban expansion, supporting analysis of conversion
patterns and suitability for future development.
# Tract Boundaries
tracts_2011 <- st_read("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/2011_census_tract.json")
## Reading layer `2011_census_tract' from data source
## `/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/2011_census_tract.json'
## using driver `ESRIJSON'
## Simple feature collection with 5265 features and 13 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -106.6456 ymin: 25.83716 xmax: -93.50804 ymax: 36.5007
## Geodetic CRS: NAD83
tracts_2021 <- st_read("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/2021_census_tract.json")
## Reading layer `2021_census_tract' from data source
## `/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/2021_census_tract.json'
## using driver `ESRIJSON'
## Simple feature collection with 6896 features and 13 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -106.6456 ymin: 25.83716 xmax: -93.50804 ymax: 36.5007
## Geodetic CRS: NAD83
# Define FIPS codes for your counties
austin_fips <- c("48453", "48491", "48209", "48021", "48055")
# Filter tract shapefiles to only Austin MSA counties
tracts_2011 <- tracts_2011 %>% filter(substr(GEOID, 1, 5) %in% austin_fips)
tracts_2021 <- tracts_2021 %>% filter(substr(GEOID, 1, 5) %in% austin_fips)
library(dplyr)
library(readr)
library(stringr)
# Define the directory and file patterns
pop_dir <- "/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/pop"
# Define the county names and years
counties <- c("travis", "williamson", "hays", "bastrop", "caldwell")
years <- c("2011", "2021")
read_clean_pop <- function(county, year) {
file_path <- file.path(pop_dir, paste0(county, "_", year, ".csv"))
read_csv(file_path, skip = 1, show_col_types = FALSE) %>%
filter(str_starts(Geography, "1400000US")) %>% # ✅ filter real tract rows
transmute(
NAME = `Geographic Area Name`,
GEOID_raw = Geography,
pop = as.numeric(`Estimate!!Total`),
GEOID = str_extract(Geography, "\\d{11}$"), # Only final 11 digits
county = str_to_title(county),
year = year
)
}
# Read and bind data for each year
pop2011_all <- bind_rows(lapply(counties, read_clean_pop, year = "2011"))
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...5`
pop2021_all <- bind_rows(lapply(counties, read_clean_pop, year = "2021"))
## New names:
## New names:
## New names:
## New names:
## New names:
## • `` -> `...5`
tracts_2011 <- left_join(tracts_2011, pop2011_all, by = "GEOID")
tracts_2021 <- left_join(tracts_2021, pop2021_all, by = "GEOID")
# Custom palette
palette5 <- c("#ffffcc", "#a1dab4", "#41b6c4", "#2c7fb8", "#253494")
# Plot for 2011
p2011 <- ggplot() +
geom_sf(data = tracts_2011, aes(fill = factor(ntile(pop, 5))), color = NA) +
scale_fill_manual(values = palette5,
labels = quantile(tracts_2011$pop, probs = seq(0, 1, 0.2), na.rm = TRUE),
name = "Quintile\nBreaks") +
labs(title = "Population, Austin MSA: 2011") +
theme_void()
# Plot for 2021
p2021 <- ggplot() +
geom_sf(data = tracts_2021, aes(fill = factor(ntile(pop, 5))), color = NA) +
scale_fill_manual(values = palette5,
labels = quantile(tracts_2021$pop, probs = seq(0, 1, 0.2), na.rm = TRUE),
name = "Quintile\nBreaks") +
labs(title = "Population, Austin MSA: 2021") +
theme_void()
# Display side by side
gridExtra::grid.arrange(p2011, p2021, ncol = 2)
### This figure compares population distribution across the Austin
metropolitan area in 2011 and 2021, classified by quintiles. Darker
shades represent census tracts with higher population counts. The maps
visually capture demographic shifts over the decade, particularly
increased density in central Austin and surrounding high-growth zones.
These patterns are critical for understanding development pressure and
guiding infrastructure planning.
# Make sure tracts are in the same CRS as the fishnet
tracts_2011 <- st_transform(tracts_2011, st_crs(austin_fishnet))
tracts_2021 <- st_transform(tracts_2021, st_crs(austin_fishnet))
# Interpolate tract population data to fishnet grid for 2011
fishnetPopulation2011 <-
st_interpolate_aw(tracts_2011["pop"], austin_fishnet, extensive = TRUE) %>%
st_centroid() %>%
st_join(austin_fishnet, ., join = st_intersects) %>%
mutate(pop_2011 = replace_na(pop, 0)) %>%
dplyr::select(pop_2011)
## Warning in st_interpolate_aw.sf(tracts_2011["pop"], austin_fishnet, extensive =
## TRUE): st_interpolate_aw assumes attributes are constant or uniform over areas
## of x
## Warning: st_centroid assumes attributes are constant over geometries
# Interpolate tract population data to fishnet grid for 2021
fishnetPopulation2021 <-
st_interpolate_aw(tracts_2021["pop"], austin_fishnet, extensive = TRUE) %>%
st_centroid() %>%
st_join(austin_fishnet, ., join = st_intersects) %>%
mutate(pop_2021 = replace_na(pop, 0)) %>%
dplyr::select(pop_2021)
## Warning in st_interpolate_aw.sf(tracts_2021["pop"], austin_fishnet, extensive =
## TRUE): st_interpolate_aw assumes attributes are constant or uniform over areas
## of x
## Warning in st_interpolate_aw.sf(tracts_2021["pop"], austin_fishnet, extensive =
## TRUE): st_centroid assumes attributes are constant over geometries
library(gridExtra)
# Plot: 2011 Tract Population
pop_plot_tract_2011 <- ggplot() +
geom_sf(data = tracts_2011, aes(fill = factor(ntile(pop, 5))), color = NA) +
scale_fill_manual(
values = palette5,
labels = substr(quintileBreaks(tracts_2011, "pop"), 1, 4),
name = "Quintile\nBreaks"
) +
labs(
title = "Population, Austin MSA: 2011",
subtitle = "Represented as census tracts"
) +
theme_void()
# Plot: 2011 Fishnet Interpolated Population
pop_plot_fishnet_2011 <- ggplot() +
geom_sf(data = fishnetPopulation2011, aes(fill = factor(ntile(pop_2011, 5))), color = NA) +
scale_fill_manual(
values = palette5,
labels = substr(quintileBreaks(fishnetPopulation2011, "pop_2011"), 1, 4),
name = "Quintile\nBreaks"
) +
labs(
title = "Population, Austin MSA: 2011",
subtitle = "Represented as interpolated fishnet grid"
) +
theme_void()
# Combine the two plots
grid.arrange(pop_plot_tract_2011, pop_plot_fishnet_2011, ncol = 2)
### fugure. Population Distribution, Austin MSA (2011): Census Tracts
vs. Interpolated Grid
# Visualize 2021 Tract vs Fishnet Interpolated Population
grid.arrange(
ggplot() +
geom_sf(data = tracts_2021, aes(fill = factor(ntile(pop, 5))), colour = NA) +
scale_fill_manual(values = palette5,
labels = substr(quintileBreaks(tracts_2021, "pop"), 1, 4),
name = "Quintile\nBreaks") +
labs(title = "Population, Austin MSA: 2021",
subtitle = "Represented as Tracts (raw)") +
theme_void(),
ggplot() +
geom_sf(data = fishnetPopulation2021, aes(fill = factor(ntile(pop_2021, 5))), colour = NA) +
scale_fill_manual(values = palette5,
labels = substr(quintileBreaks(fishnetPopulation2021, "pop_2021"), 1, 4),
name = "Quintile\nBreaks") +
labs(title = "Population, Austin MSA: 2021",
subtitle = "Interpolated to Fishnet Grid") +
theme_void(),
ncol = 2
)
### figure. Population Distribution, Austin MSA (2021): Census Tracts
vs. Interpolated Grid
# 7.1 Define and subset highways for Austin MSA
austinHighways <- roads_2011 %>%
st_as_sf() %>%
st_transform(st_crs(austinMSA)) %>%
st_intersection(st_geometry(austinMSA)) %>%
st_transform(st_crs(austin_fishnet))
## Warning: attribute variables are assumed to be spatially constant throughout
## all geometries
# 7.1 FIX: Make sure development_change exists in fishnet
# Re-run this only if you lost the column previously
# Note: development_change was calculated by summing raster points and aggregating to the grid
# Re-aggregate dev change from raster to fishnet (just to be sure)
changePoints <- rasterToPoints(development_change) %>%
as.data.frame() %>%
st_as_sf(coords = c("x", "y"), crs = st_crs(austin_fishnet))
fishnet <- aggregate(changePoints, austin_fishnet, FUN = sum) %>%
mutate(development_change = ifelse(is.na(layer), 0, 1),
development_change = factor(development_change, levels = c(0,1), labels = c("No Change", "New Development"))) %>%
dplyr::select(-layer)
# 7.2 Visualize development change and highways using geometry-aware `geom_sf()`
ggplot() +
geom_sf(data = fishnet, aes(fill = development_change), color = NA) +
geom_sf(data = austinHighways, color = "black", size = 0.4) +
scale_fill_manual(values = palette2) +
labs(title = "New Development and Highways (2011–2021)",
subtitle = "Austin Fishnet Cells with Primary Roads") +
theme_void()
### the spatial relationship between primary highways and new
development across the Austin Metropolitan Statistical Area (MSA). Using
a fishnet grid, areas marked in dark blue represent new development from
2011 to 2021, overlaid with primary road networks. The visualization
highlights how development tends to cluster around major transportation
corridors, supporting the inclusion of road proximity as a key predictor
in the urban growth model.
# 7.2.1 Calculate Distance from Fishnet Centroids to Highways
highwayPoints_fishnet_2011 <- austin_fishnet %>%
st_centroid() %>%
mutate(distance_highways_2011 = as.numeric(
st_distance(., st_union(austinHighways) %>%
st_transform(st_crs(austin_fishnet)))
)) %>%
as.data.frame() %>%
dplyr::select(-geometry) %>%
left_join(austin_fishnet, ., by = "uniqueID") %>%
st_as_sf()
## Warning: st_centroid assumes attributes are constant over geometries
# 7.2.2 Assume 2021 network = 2011 (duplicate)
highwayPoints_fishnet_2021 <- highwayPoints_fishnet_2011 %>%
rename(distance_highways_2021 = distance_highways_2011)
ggplot() +
geom_sf(data = austinMSA %>% st_transform(st_crs(highwayPoints_fishnet_2011))) +
geom_point(data = highwayPoints_fishnet_2011,
aes(x = xyC(highwayPoints_fishnet_2011)[,1],
y = xyC(highwayPoints_fishnet_2011)[,2],
colour = factor(ntile(distance_highways_2011, 5))),
size = 1.5) +
scale_colour_manual(
values = palette5,
labels = substr(quintileBreaks(highwayPoints_fishnet_2011, "distance_highways_2011"), 1, 8),
name = "Quintile\nBreaks"
) +
geom_sf(data = austinHighways, colour = "red") +
labs(title = "Distance to Highways (ft)",
subtitle = "Fishnet Centroids; Highways visualized in red") +
theme_void()
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
### Euclidean distance from each fishnet centroid to the nearest primary
highway within the Austin Metropolitan Statistical Area. Red lines
represent major highways, while the color gradient shows increasing
distance from these roads
# 1. Load your new highway file as sf
new_highway <- st_read("/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/proposed_1highway.geojson")
## Reading layer `proposed_1highway' from data source
## `/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/proposed_1highway.geojson'
## using driver `ESRIJSON'
## Simple feature collection with 1 feature and 2 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: -10877710 ymin: 3550515 xmax: -10868470 ymax: 3617404
## Projected CRS: WGS 84 / Pseudo-Mercator
# 2. Ensure it matches CRS of fishnet
new_highway <- st_transform(new_highway, st_crs(austin_fishnet))
# 3. Calculate distance from centroids to the highway
fishnet_centroids <- st_centroid(austin_fishnet)
## Warning: st_centroid assumes attributes are constant over geometries
fishnet_centroids$distance_new_highway <- as.numeric(
st_distance(fishnet_centroids, st_union(new_highway))
)
#section 8
# Objective:
# The code calculates the spatial lag to the nearest two developed cells in 2011 and 2021 from the centroids of a fishnet grid, and then visualizes the 2011 lag in a choropleth-style plot using quintile classification.
# Calculate spatial lag to 2 nearest developed cells in 2011
fishnet$lagDevelopment_2011 <-
nn_function(
fishnet %>%
st_centroid() %>%
st_coordinates() %>%
as.data.frame(),
lcRasters_2011 %>%
filter(developed_2011 == 1) %>%
st_centroid() %>%
st_coordinates() %>%
as.data.frame(),
k = 2
)
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
# Duplicate for 2021 using the same assumption (or adapt if new dev data used)
fishnet$lagDevelopment_2021 <-
nn_function(
fishnet %>%
st_centroid() %>%
st_coordinates() %>%
as.data.frame(),
lcRasters_2021 %>%
filter(developed_2021 == 1) %>%
st_centroid() %>%
st_coordinates() %>%
as.data.frame(),
k = 2
)
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
ggplot() +
geom_sf(data = austinMSA %>% st_transform(st_crs(fishnet))) +
geom_point(data = fishnet,
aes(x = xyC(fishnet)[,1],
y = xyC(fishnet)[,2],
colour = factor(ntile(lagDevelopment_2011, 5))),
size = 1.5) +
scale_colour_manual(values = palette5,
labels = substr(quintileBreaks(fishnet, "lagDevelopment_2011"), 1, 7),
name = "Quintile\nBreaks") +
labs(title = "Spatial Lag to 2011 Development (ft)",
subtitle = "Measured from fishnet centroids to nearest developed cells") +
theme_void()
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
### the spatial lag to prior urban development in the Austin MSA by
showing the average distance (in feet) from each fishnet centroid to its
two nearest developed cells in 2011. Quintile classification reveals
spatial patterns in proximity, highlighting areas with strong
development momentum (lighter colors) versus more isolated or
undeveloped regions (darker shades). This metric is critical in
understanding contagion effects and diffusion-based urban growth.
# Define the directory containing your county geojson files
county_dir <- "/Users/1m/Library/CloudStorage/OneDrive-PennO365/25spring/planning and env modelling/Austin/r_import/county"
# Define the county file name roots (all lowercase to match filenames)
county_names <- c("bastrop", "caldwell", "hays", "travis", "williamson")
# Function to read and tag each county
read_county <- function(county) {
file_path <- file.path(county_dir, paste0(tools::toTitleCase(county), "_County.geojson"))
st_read(file_path, quiet = TRUE) %>%
mutate(NAME = tools::toTitleCase(county)) # Add county name if not present
}
# Read and bind all counties
studyAreaCounties <- bind_rows(lapply(county_names, read_county)) %>%
st_transform(st_crs(austin_fishnet)) # Match CRS
countyFishnet <- austin_fishnet %>%
st_join(studyAreaCounties %>% dplyr::select(NAME)) %>%
as.data.frame() %>%
dplyr::select(uniqueID, NAME) %>%
left_join(austin_fishnet, .) %>%
st_as_sf() %>%
group_by(uniqueID) %>%
slice(1) %>%
ungroup() %>%
arrange(as.numeric(uniqueID))
## Joining with `by = join_by(uniqueID)`
# Combine t1: 2011-based modeling dataset
dat_2011 <-
cbind(fishnet, highwayPoints_fishnet_2011, fishnetPopulation2011, lcRasters_2011, countyFishnet) %>%
as.data.frame() %>%
dplyr::select(uniqueID, development_change, lagDevelopment_2011, distance_highways_2011, pop_2011,
developed_2011, forest_2011, farm_2011, wetlands_2011, otherUndeveloped_2011, water_2011,
NAME, geometry) %>%
filter(water_2011 == 0) %>%
rename_with(~ str_remove(.x, "_2011"))
# Combine t2: 2021-based forecasting dataset
dat_2021 <-
cbind(fishnet, highwayPoints_fishnet_2021, fishnetPopulation2021, lcRasters_2021, countyFishnet) %>%
as.data.frame() %>%
dplyr::select(uniqueID, development_change, lagDevelopment_2021, distance_highways_2021, pop_2021,
developed_2021, forest_2021, farm_2021, wetlands_2021, otherUndeveloped_2021, water_2021,
NAME, geometry) %>%
filter(water_2021 == 0) %>%
rename_with(~ str_remove(.x, "_2021"))
# ---- Integrate New Highway Distance into 2031 Dataset ----
# Assumes fishnet_newhighway has columns "uniqueID" and "distance_new_highway"
# 4. Create a table with just ID and distance
new_hw_distances <- fishnet_centroids %>%
st_drop_geometry() %>%
dplyr::select(uniqueID, distance_new_highway)
# 5. Join this distance to dat_2021 by uniqueID
dat_2021 <- dat_2021 %>%
left_join(new_hw_distances, by = "uniqueID")
# 6. Confirm success
summary(dat_2021$distance_new_highway)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.39 18989.96 35358.88 35882.37 52790.49 80967.65
dat_2011 %>%
dplyr::select(distance_highways, lagDevelopment, development_change, pop) %>%
gather(Variable, Value, -development_change) %>%
ggplot(aes(x = development_change, y = Value, fill = development_change)) +
geom_bar(position = "dodge", stat = "summary", fun = mean) +
facet_wrap(~Variable, scales = "free") +
scale_fill_manual(values = palette2,
labels = c("No Change", "New Development"),
name = "Mean Value") +
labs(title = "New Development as a Function of Continuous Variables",
x = "Development Change (0 = No, 1 = Yes)",
y = "Mean Value") +
theme_minimal()
### compares the average values of three continuous predictors—distance
to highways, spatial lag to existing development, and population—between
areas that experienced development (1) and those that did not (0) in the
Austin MSA. New development areas tend to have lower distances to
highways and prior development, and higher population values, supporting
the role of accessibility and density in driving urban expansion.
# Convert land cover variables from factor to numeric (0/1)
dat_2011 <- dat_2011 %>%
mutate(
forest = as.numeric(as.character(forest)),
farm = as.numeric(as.character(farm)),
wetlands = as.numeric(as.character(wetlands)),
otherUndeveloped = as.numeric(as.character(otherUndeveloped))
)
dat_2011 %>%
dplyr::select(development_change, forest, farm, wetlands, otherUndeveloped) %>%
gather(key = "Land_Cover_Type", Value, -development_change) %>%
group_by(development_change, Land_Cover_Type) %>%
summarize(n = sum(Value), .groups = 'drop') %>%
ungroup() %>%
mutate(Conversion_Rate = paste0(round(100 * n / sum(n), 2), "%")) %>%
filter(development_change == "New Development") %>%
dplyr::select(Land_Cover_Type, Conversion_Rate) %>%
kable() %>%
kable_styling(full_width = FALSE)
| Land_Cover_Type | Conversion_Rate |
|---|---|
| farm | 0.6% |
| forest | 0.96% |
| otherUndeveloped | 1.97% |
| wetlands | 0.01% |
library(caret)
set.seed(3456)
trainIndex <- createDataPartition(dat_2011$otherUndeveloped, p = 0.7, list = FALSE, times = 1)
datTrain <- dat_2011[trainIndex, ]
datTest <- dat_2011[-trainIndex, ]
Model1 <- glm(development_change ~ wetlands + forest + farm + otherUndeveloped,
family = binomial(link = "logit"), data = datTrain)
Model2 <- glm(development_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment,
family = binomial(link = "logit"), data = datTrain)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Model3 <- glm(development_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop,
family = binomial(link = "logit"), data = datTrain)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Model4 <- glm(development_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop + NAME,
family = binomial(link = "logit"), data = datTrain)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Model5 <- glm(development_change ~ wetlands + forest + farm + otherUndeveloped + lagDevelopment + pop + distance_highways + NAME,
family = binomial(link = "logit"), data = datTrain)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Model6 <- glm(development_change ~ wetlands + forest + farm + otherUndeveloped + pop + lagDevelopment * distance_highways + NAME,
family = binomial(link = "logit"), data = datTrain)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
# New logistic regression model including distance to proposed highway
Model2031 <- glm(development_change ~ wetlands + forest + farm + otherUndeveloped +
pop + lagDevelopment + distance_highways + distance_new_highway + NAME,
family = binomial(link = "logit"),
data = dat_2021)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
summary(Model1)
##
## Call:
## glm(formula = development_change ~ wetlands + forest + farm +
## otherUndeveloped, family = binomial(link = "logit"), data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.0999 0.3545 -14.385 < 2e-16 ***
## wetlands 0.8804 1.0679 0.824 0.4097
## forest 1.8923 0.3729 5.075 3.87e-07 ***
## farm 0.8528 0.3816 2.235 0.0254 *
## otherUndeveloped 2.4438 0.3629 6.735 1.64e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2795.0 on 9738 degrees of freedom
## Residual deviance: 2624.7 on 9734 degrees of freedom
## AIC: 2634.7
##
## Number of Fisher Scoring iterations: 7
summary(Model2)
##
## Call:
## glm(formula = development_change ~ wetlands + forest + farm +
## otherUndeveloped + lagDevelopment, family = binomial(link = "logit"),
## data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.459e+00 3.560e-01 -12.523 <2e-16 ***
## wetlands 2.639e+00 1.088e+00 2.425 0.0153 *
## forest 3.621e+00 3.810e-01 9.506 <2e-16 ***
## farm 3.493e+00 3.954e-01 8.835 <2e-16 ***
## otherUndeveloped 4.505e+00 3.743e-01 12.038 <2e-16 ***
## lagDevelopment -9.438e-04 6.088e-05 -15.504 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2795.0 on 9738 degrees of freedom
## Residual deviance: 2004.4 on 9733 degrees of freedom
## AIC: 2016.4
##
## Number of Fisher Scoring iterations: 9
summary(Model3)
##
## Call:
## glm(formula = development_change ~ wetlands + forest + farm +
## otherUndeveloped + lagDevelopment + pop, family = binomial(link = "logit"),
## data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.326e+00 4.727e-01 -11.266 < 2e-16 ***
## wetlands 3.211e+00 1.116e+00 2.878 0.004 **
## forest 4.249e+00 4.561e-01 9.316 < 2e-16 ***
## farm 4.169e+00 4.753e-01 8.771 < 2e-16 ***
## otherUndeveloped 5.142e+00 4.522e-01 11.369 < 2e-16 ***
## lagDevelopment -8.898e-04 6.106e-05 -14.573 < 2e-16 ***
## pop 8.527e-04 2.105e-04 4.050 5.13e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2795.0 on 9738 degrees of freedom
## Residual deviance: 1991.6 on 9732 degrees of freedom
## AIC: 2005.6
##
## Number of Fisher Scoring iterations: 9
summary(Model4)
##
## Call:
## glm(formula = development_change ~ wetlands + forest + farm +
## otherUndeveloped + lagDevelopment + pop + NAME, family = binomial(link = "logit"),
## data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.569e+00 5.030e-01 -11.071 < 2e-16 ***
## wetlands 3.248e+00 1.112e+00 2.920 0.003499 **
## forest 4.202e+00 4.450e-01 9.442 < 2e-16 ***
## farm 4.258e+00 4.672e-01 9.114 < 2e-16 ***
## otherUndeveloped 5.029e+00 4.414e-01 11.393 < 2e-16 ***
## lagDevelopment -8.855e-04 6.176e-05 -14.337 < 2e-16 ***
## pop 7.563e-04 2.224e-04 3.400 0.000673 ***
## NAMECaldwell -7.407e-01 4.999e-01 -1.482 0.138436
## NAMEHays 3.773e-02 2.673e-01 0.141 0.887752
## NAMETravis 3.069e-01 2.417e-01 1.270 0.204193
## NAMEWilliamson 7.161e-01 2.439e-01 2.936 0.003326 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2794.5 on 9731 degrees of freedom
## Residual deviance: 1967.1 on 9721 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 1989.1
##
## Number of Fisher Scoring iterations: 9
summary(Model5)
##
## Call:
## glm(formula = development_change ~ wetlands + forest + farm +
## otherUndeveloped + lagDevelopment + pop + distance_highways +
## NAME, family = binomial(link = "logit"), data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.343e+00 5.042e-01 -10.596 < 2e-16 ***
## wetlands 3.086e+00 1.113e+00 2.773 0.00556 **
## forest 4.306e+00 4.440e-01 9.698 < 2e-16 ***
## farm 4.128e+00 4.661e-01 8.857 < 2e-16 ***
## otherUndeveloped 4.978e+00 4.407e-01 11.297 < 2e-16 ***
## lagDevelopment -8.055e-04 6.327e-05 -12.731 < 2e-16 ***
## pop 6.725e-04 2.306e-04 2.916 0.00355 **
## distance_highways -1.723e-04 3.507e-05 -4.914 8.94e-07 ***
## NAMECaldwell -8.363e-01 5.008e-01 -1.670 0.09491 .
## NAMEHays 2.365e-01 2.696e-01 0.877 0.38047
## NAMETravis 4.099e-01 2.426e-01 1.689 0.09114 .
## NAMEWilliamson 7.150e-01 2.440e-01 2.931 0.00338 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2794.5 on 9731 degrees of freedom
## Residual deviance: 1936.9 on 9720 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 1960.9
##
## Number of Fisher Scoring iterations: 9
summary(Model6)
##
## Call:
## glm(formula = development_change ~ wetlands + forest + farm +
## otherUndeveloped + pop + lagDevelopment * distance_highways +
## NAME, family = binomial(link = "logit"), data = datTrain)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.754e+00 5.162e-01 -11.148 < 2e-16 ***
## wetlands 3.086e+00 1.112e+00 2.775 0.005524 **
## forest 4.231e+00 4.428e-01 9.554 < 2e-16 ***
## farm 4.086e+00 4.654e-01 8.780 < 2e-16 ***
## otherUndeveloped 4.930e+00 4.398e-01 11.210 < 2e-16 ***
## pop 7.309e-04 2.248e-04 3.251 0.001150 **
## lagDevelopment -5.368e-04 8.851e-05 -6.064 1.32e-09 ***
## distance_highways 5.880e-05 6.831e-05 0.861 0.389353
## NAMECaldwell -8.975e-01 5.002e-01 -1.794 0.072794 .
## NAMEHays 2.524e-01 2.696e-01 0.936 0.349315
## NAMETravis 3.927e-01 2.421e-01 1.622 0.104743
## NAMEWilliamson 6.825e-01 2.433e-01 2.806 0.005021 **
## lagDevelopment:distance_highways -1.287e-07 3.640e-08 -3.537 0.000405 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2794.5 on 9731 degrees of freedom
## Residual deviance: 1922.7 on 9719 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 1948.7
##
## Number of Fisher Scoring iterations: 11
summary(Model2031)
##
## Call:
## glm(formula = development_change ~ wetlands + forest + farm +
## otherUndeveloped + pop + lagDevelopment + distance_highways +
## distance_new_highway + NAME, family = binomial(link = "logit"),
## data = dat_2021)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.408e-01 3.048e-01 2.431 0.01507 *
## wetlands1 -2.280e+00 1.024e+00 -2.225 0.02606 *
## forest1 -4.012e+00 4.034e-01 -9.946 < 2e-16 ***
## farm1 -1.865e+01 3.699e+02 -0.050 0.95979
## otherUndeveloped1 -5.154e+00 5.999e-01 -8.591 < 2e-16 ***
## pop -1.900e-03 1.592e-04 -11.932 < 2e-16 ***
## lagDevelopment -6.186e-04 9.904e-05 -6.246 4.21e-10 ***
## distance_highways -4.672e-05 2.560e-05 -1.825 0.06803 .
## distance_new_highway -2.575e-05 6.193e-06 -4.158 3.21e-05 ***
## NAMECaldwell -1.904e-01 5.058e-01 -0.377 0.70653
## NAMEHays 6.711e-01 2.585e-01 2.596 0.00943 **
## NAMETravis -2.895e-01 2.453e-01 -1.180 0.23790
## NAMEWilliamson 1.092e-01 2.770e-01 0.394 0.69328
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 3896.8 on 13886 degrees of freedom
## Residual deviance: 1989.1 on 13874 degrees of freedom
## (7 observations deleted due to missingness)
## AIC: 2015.1
##
## Number of Fisher Scoring iterations: 20
library(ggplot2)
library(dplyr)
data.frame(
Model = c("Model1", "Model2", "Model3", "Model4", "Model5", "Model6","Model2031"),
AIC = c(Model1$aic, Model2$aic, Model3$aic, Model4$aic, Model5$aic, Model6$aic, Model2031$aic)
) %>%
ggplot() +
geom_bar(aes(x = Model, y = AIC), stat = "identity", fill = "skyblue") +
theme_minimal() +
labs(title = "AIC Comparison of Urban Growth Models",
y = "Akaike Information Criterion (AIC)")
testSetProbs <- data.frame(
class = datTest$development_change,
probs = predict(Model6, datTest, type = "response")
)
ggplot(testSetProbs, aes(probs)) +
geom_density(aes(fill=class), alpha=0.5) +
scale_fill_manual(values = palette2,
labels=c("No Change","New Development")) +
labs(title = "Histogram of Test Set Predicted Probabilities",
x = "Predicted Probabilities", y = "Density") +
theme_minimal()
# Confusion matrix, Example on MODEL 6
# Fix the NA issue by reassigning directly from datTest
# Step 1: Recode reference class directly from datTest
testSetProbs$class <- as.numeric(datTest$development_change == "New Development")
# Step 2: Create predClass based on your threshold
testSetProbs$predClass <- ifelse(testSetProbs$probs > 0.05, 1, 0)
# Step 3: Convert both to factors with same levels
testSetProbs <- testSetProbs %>%
mutate(
class = factor(class, levels = c(0,1)),
predClass = factor(predClass, levels = c(0,1))
)
# Step 4: Confusion matrix
caret::confusionMatrix(
reference = testSetProbs$class,
data = testSetProbs$predClass,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 3395 22
## 1 656 100
##
## Accuracy : 0.8375
## 95% CI : (0.826, 0.8486)
## No Information Rate : 0.9708
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1869
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.81967
## Specificity : 0.83806
## Pos Pred Value : 0.13228
## Neg Pred Value : 0.99356
## Prevalence : 0.02924
## Detection Rate : 0.02396
## Detection Prevalence : 0.18116
## Balanced Accuracy : 0.82887
##
## 'Positive' Class : 1
##
# Recode reference values to 1 and 0
testSetProbs <- testSetProbs %>%
mutate(class = as.numeric(datTest$development_change == "New Development"))
# Plot the ROC curve
library(ggplot2)
library(plotROC)
ggplot(testSetProbs, aes(d = class, m = probs)) +
geom_roc(n.cuts = 50, labels = FALSE) +
style_roc(theme = theme_grey) +
geom_abline(slope = 1, intercept = 0, size = 1.5, color = 'grey') +
theme_minimal() +
labs(title = "ROC Curve - Austin Urban Development Model")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Load required libraries
library(dplyr)
library(sf)
library(kableExtra)
library(tidyr)
library(ggplot2)
# Create dat_2011_preds with predictions and classification labels
dat_2011_preds <- dat_2011 %>%
mutate(
development_change = case_when(
development_change == "New Development" ~ 1,
development_change == "No Change" ~ 0
),
probs = predict(Model6, ., type = "response"),
Threshold_5_Pct = as.numeric(probs >= 0.05),
Threshold_17_Pct = as.numeric(probs >= 0.17)
) %>%
mutate(
confResult_05 = case_when(
Threshold_5_Pct == 0 & development_change == 0 ~ "True_Negative",
Threshold_5_Pct == 1 & development_change == 1 ~ "True_Positive",
Threshold_5_Pct == 0 & development_change == 1 ~ "False_Negative",
Threshold_5_Pct == 1 & development_change == 0 ~ "False_Positive"
),
confResult_17 = case_when(
Threshold_17_Pct == 0 & development_change == 0 ~ "True_Negative",
Threshold_17_Pct == 1 & development_change == 1 ~ "True_Positive",
Threshold_17_Pct == 0 & development_change == 1 ~ "False_Negative",
Threshold_17_Pct == 1 & development_change == 0 ~ "False_Positive"
)
) %>%
st_as_sf()
# Inspect basic structure and values
table(dat_2011_preds$Threshold_5_Pct)
##
## 0 1
## 11319 2586
str(dat_2011_preds$Threshold_5_Pct)
## num [1:13912] 0 0 0 0 0 0 0 0 0 0 ...
table(dat_2011$development_change)
##
## No Change New Development
## 13473 439
str(dat_2011$development_change)
## Factor w/ 2 levels "No Change","New Development": 1 1 1 1 1 1 1 1 1 1 ...
# Confusion summary table by county and threshold
dat_2011_preds %>%
as.data.frame() %>%
dplyr::select(NAME, confResult_05, confResult_17) %>%
pivot_longer(cols = starts_with("confResult"),
names_to = "Model_Type",
values_to = "Confusion_Result") %>%
group_by(NAME, Model_Type, Confusion_Result) %>%
tally() %>%
pivot_wider(names_from = Confusion_Result,
values_from = n,
values_fill = list(n = 0)) %>%
mutate(
TN_Rate_Specificity = 100 * (True_Negative / (True_Negative + False_Positive)),
TP_Rate_Sensitivity = 100 * (True_Positive / (True_Positive + False_Negative))
) %>%
dplyr::select(NAME, Model_Type, TN_Rate_Specificity, TP_Rate_Sensitivity) %>%
kable() %>%
kable_styling(full_width = FALSE)
| NAME | Model_Type | TN_Rate_Specificity | TP_Rate_Sensitivity |
|---|---|---|---|
| Bastrop | confResult_05 | 92.11506 | 47.36842 |
| Bastrop | confResult_17 | 99.86464 | 10.52632 |
| Caldwell | confResult_05 | 99.61089 | 0.00000 |
| Caldwell | confResult_17 | 100.00000 | 0.00000 |
| Hays | confResult_05 | 81.89655 | 92.75362 |
| Hays | confResult_17 | 95.78040 | 52.17391 |
| Travis | confResult_05 | 68.83245 | 85.38012 |
| Travis | confResult_17 | 92.18338 | 45.02924 |
| Williamson | confResult_05 | 81.61680 | 87.09677 |
| Williamson | confResult_17 | 93.35443 | 60.00000 |
| NA | confResult_05 | NaN | NaN |
| NA | confResult_17 | NaN | NaN |
ggplot() +
geom_sf(data = dat_2011_preds %>%
st_centroid() %>%
dplyr::select(confResult_05, confResult_17, geometry) %>%
tidyr::pivot_longer(cols = starts_with("confResult"),
names_to = "Threshold",
values_to = "Classification") %>%
filter(!is.na(Classification)),
aes(colour = Classification)) +
facet_wrap(~Threshold) +
scale_colour_manual(
values = c(
"False_Negative" = "red",
"False_Positive" = "yellow",
"True_Negative" = "blue",
"True_Positive" = "gray"
),
name = "Prediction Result"
) +
labs(title = "Spatial Distribution of Prediction Accuracy by Threshold") +
theme_void()
## Warning: st_centroid assumes attributes are constant over geometries
# Prepare test set predictions for Model2031
dat_2021_preds <- dat_2021 %>%
mutate(
development_change = case_when(
development_change == "New Development" ~ 1,
development_change == "No Change" ~ 0
),
probs = predict(Model2031, ., type = "response"),
Threshold_5_Pct = as.numeric(probs >= 0.05),
Threshold_17_Pct = as.numeric(probs >= 0.17)
) %>%
mutate(
confResult_05 = case_when(
Threshold_5_Pct == 0 & development_change == 0 ~ "True_Negative",
Threshold_5_Pct == 1 & development_change == 1 ~ "True_Positive",
Threshold_5_Pct == 0 & development_change == 1 ~ "False_Negative",
Threshold_5_Pct == 1 & development_change == 0 ~ "False_Positive"
),
confResult_17 = case_when(
Threshold_17_Pct == 0 & development_change == 0 ~ "True_Negative",
Threshold_17_Pct == 1 & development_change == 1 ~ "True_Positive",
Threshold_17_Pct == 0 & development_change == 1 ~ "False_Negative",
Threshold_17_Pct == 1 & development_change == 0 ~ "False_Positive"
)
) %>%
st_as_sf()
# Recode for confusion matrix
testSetProbs_2031 <- dat_2021_preds %>%
st_drop_geometry() %>%
mutate(
class = factor(development_change, levels = c(0,1)),
predClass = factor(Threshold_5_Pct, levels = c(0,1))
)
caret::confusionMatrix(
reference = testSetProbs_2031$class,
data = testSetProbs_2031$predClass,
positive = "1"
)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 12152 18
## 1 1296 421
##
## Accuracy : 0.9054
## 95% CI : (0.9004, 0.9102)
## No Information Rate : 0.9684
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.3582
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.95900
## Specificity : 0.90363
## Pos Pred Value : 0.24520
## Neg Pred Value : 0.99852
## Prevalence : 0.03161
## Detection Rate : 0.03032
## Detection Prevalence : 0.12364
## Balanced Accuracy : 0.93131
##
## 'Positive' Class : 1
##
# Make sure to load libraries
library(ggplot2)
library(dplyr)
# Generate predictions and attach to test set
testSetProbs_2031 <- data.frame(
class = dat_2021$development_change,
probs = predict(Model2031, dat_2021, type = "response")
)
# Plot density of predicted probabilities by true class
ggplot(testSetProbs_2031, aes(x = probs)) +
geom_density(aes(fill = class), alpha = 0.5) +
scale_fill_manual(values = palette2,
labels = c("No Change", "New Development")) +
labs(
title = "Histogram of test set predicted probabilities",
x = "Predicted Probabilities",
y = "Density"
) +
theme_minimal()
## Warning: Removed 7 rows containing non-finite outside the scale range
## (`stat_density()`).
library(plotROC)
# Clean dataset before plotting ROC
testSetProbs_2031_clean <- testSetProbs_2031 %>%
filter(!is.na(class), !is.na(probs))
testSetProbs_2031_clean <- testSetProbs_2031_clean %>%
mutate(
d = ifelse(class == "New Development", 1,
ifelse(class == "No Change", 0, NA_real_))
) %>%
filter(!is.na(d)) # just in case anything slipped through
library(plotROC)
ggplot(testSetProbs_2031_clean, aes(d = d, m = probs)) +
geom_roc(n.cuts = 50, labels = FALSE) +
style_roc(theme = theme_minimal()) +
geom_abline(slope = 1, intercept = 0, size = 1.2, color = "gray") +
labs(title = "ROC Curve - Model2031: Urban Development Forecast (Austin 2031)")
dat_2021_preds %>%
as.data.frame() %>%
dplyr::select(NAME, confResult_05, confResult_17) %>%
pivot_longer(cols = starts_with("confResult"),
names_to = "Model_Type",
values_to = "Confusion_Result") %>%
group_by(NAME, Model_Type, Confusion_Result) %>%
tally() %>%
pivot_wider(names_from = Confusion_Result,
values_from = n,
values_fill = list(n = 0)) %>%
mutate(
TN_Rate_Specificity = 100 * (True_Negative / (True_Negative + False_Positive)),
TP_Rate_Sensitivity = 100 * (True_Positive / (True_Positive + False_Negative))
) %>%
dplyr::select(NAME, Model_Type, TN_Rate_Specificity, TP_Rate_Sensitivity) %>%
kable() %>%
kable_styling(full_width = FALSE)
| NAME | Model_Type | TN_Rate_Specificity | TP_Rate_Sensitivity |
|---|---|---|---|
| Bastrop | confResult_05 | 97.29272 | 100.00000 |
| Bastrop | confResult_17 | 97.36041 | 97.36842 |
| Caldwell | confResult_05 | 98.27682 | 83.33333 |
| Caldwell | confResult_17 | 99.11062 | 33.33333 |
| Hays | confResult_05 | 92.69510 | 97.10145 |
| Hays | confResult_17 | 94.41924 | 94.20290 |
| Travis | confResult_05 | 77.92509 | 92.98246 |
| Travis | confResult_17 | 88.66424 | 70.76023 |
| Williamson | confResult_05 | 89.69191 | 98.06452 |
| Williamson | confResult_17 | 93.66542 | 87.09677 |
| NA | confResult_05 | NaN | NaN |
| NA | confResult_17 | NaN | NaN |
ggplot() +
geom_sf(data = dat_2021_preds %>%
st_centroid() %>%
dplyr::select(confResult_05, confResult_17, geometry) %>%
tidyr::pivot_longer(cols = starts_with("confResult"),
names_to = "Threshold",
values_to = "Classification") %>%
filter(!is.na(Classification)),
aes(colour = Classification)) +
facet_wrap(~Threshold) +
scale_colour_manual(
values = c(
"False_Negative" = "red",
"False_Positive" = "yellow",
"True_Negative" = "blue",
"True_Positive" = "gray"
),
name = "Prediction Result"
) +
labs(title = "Model2031: Spatial Distribution of Prediction Accuracy") +
theme_void()
## Warning: st_centroid assumes attributes are constant over geometries
dat_2031_preds <- dat_2021 %>%
mutate(
probs = predict(Model2031, ., type = "response"),
Prediction = as.factor(ifelse(probs >= 0.17, 1, 0))
) %>%
st_as_sf()
ggplot(data = dat_2031_preds) +
geom_point(aes(x = xyC(dat_2031_preds)[,1],
y = xyC(dat_2031_preds)[,2],
colour = Prediction)) +
geom_sf(data = studyAreaCounties, fill = "transparent") +
labs(title = "Development Predictions - 2031") +
theme_void()
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
## Warning: st_centroid assumes attributes are constant over geometries
dat_2031_preds %>%
as.data.frame() %>%
filter(Prediction == 1) %>%
tally() %>%
rename(total_cells = n) %>%
mutate(
total_area_m = total_cells * res(lc_2021_rs)[1],
total_km2 = total_area_m / 1e6,
total_mi2 = total_km2 * 0.386102
) %>%
kable() %>%
kable_styling()
| total_cells | total_area_m | total_km2 | total_mi2 |
|---|---|---|---|
| 1139 | 1025100 | 1.0251 | 0.3957932 |
dat_2031_preds %>%
as.data.frame() %>%
filter(Prediction == 1) %>%
group_by(NAME) %>%
tally() %>%
rename(total_cells = n) %>%
mutate(
total_area_m = total_cells * res(lc_2021_rs)[1],
total_km2 = total_area_m / 1e6,
total_mi2 = total_km2 * 0.386102
) %>%
kable() %>%
kable_styling()
| NAME | total_cells | total_area_m | total_km2 | total_mi2 |
|---|---|---|---|---|
| Bastrop | 115 | 103500 | 0.1035 | 0.0399616 |
| Caldwell | 18 | 16200 | 0.0162 | 0.0062549 |
| Hays | 188 | 169200 | 0.1692 | 0.0653285 |
| Travis | 463 | 416700 | 0.4167 | 0.1608887 |
| Williamson | 355 | 319500 | 0.3195 | 0.1233596 |
dat_2031_preds %>%
as.data.frame() %>%
filter(Prediction == 1) %>%
dplyr::select(farm, otherUndeveloped, forest, wetlands, NAME) %>%
pivot_longer(cols = -NAME, names_to = "Land_Cover_Type", values_to = "Value") %>%
mutate(Value = as.numeric(Value)) %>%
group_by(NAME, Land_Cover_Type) %>%
summarize(total_cells = sum(Value, na.rm = TRUE), .groups = "drop") %>%
mutate(
total_area_m = total_cells * res(lc_2021_rs)[1],
total_km2 = total_area_m / 1e6,
total_mi2 = total_km2 * 0.386102
) %>%
kable() %>%
kable_styling()
| NAME | Land_Cover_Type | total_cells | total_area_m | total_km2 | total_mi2 |
|---|---|---|---|---|---|
| Bastrop | farm | 115 | 103500 | 0.1035 | 0.0399616 |
| Bastrop | forest | 115 | 103500 | 0.1035 | 0.0399616 |
| Bastrop | otherUndeveloped | 115 | 103500 | 0.1035 | 0.0399616 |
| Bastrop | wetlands | 115 | 103500 | 0.1035 | 0.0399616 |
| Caldwell | farm | 18 | 16200 | 0.0162 | 0.0062549 |
| Caldwell | forest | 18 | 16200 | 0.0162 | 0.0062549 |
| Caldwell | otherUndeveloped | 18 | 16200 | 0.0162 | 0.0062549 |
| Caldwell | wetlands | 18 | 16200 | 0.0162 | 0.0062549 |
| Hays | farm | 188 | 169200 | 0.1692 | 0.0653285 |
| Hays | forest | 188 | 169200 | 0.1692 | 0.0653285 |
| Hays | otherUndeveloped | 188 | 169200 | 0.1692 | 0.0653285 |
| Hays | wetlands | 188 | 169200 | 0.1692 | 0.0653285 |
| Travis | farm | 463 | 416700 | 0.4167 | 0.1608887 |
| Travis | forest | 463 | 416700 | 0.4167 | 0.1608887 |
| Travis | otherUndeveloped | 463 | 416700 | 0.4167 | 0.1608887 |
| Travis | wetlands | 463 | 416700 | 0.4167 | 0.1608887 |
| Williamson | farm | 355 | 319500 | 0.3195 | 0.1233596 |
| Williamson | forest | 355 | 319500 | 0.3195 | 0.1233596 |
| Williamson | otherUndeveloped | 355 | 319500 | 0.3195 | 0.1233596 |
| Williamson | wetlands | 355 | 319500 | 0.3195 | 0.1233596 |